# Libraries
library(dplyr)
library(ggplot2)
library(readr)
library(forcats)
library(TSP)
vole_vaso <- c(98,96,94,88,86,82,77,74,70,60,
59,52,50,47,40,35,29,13,6,5)
None!! :-)
mean(vole_vaso)
## [1] 58.05
The mean is 58.05
median(vole_vaso)
## [1] 59.5
The median is 59.5
sd(vole_vaso)
## [1] 29.75244
The SD is 29.75244
IQR(vole_vaso)
## [1] 44.25
The IQR is 44.25
SE of the mean = sd of your sample / the square root of your sample size
vole_se_mean <- sd(vole_vaso) / sqrt(length(vole_vaso))
vole_se_mean
## [1] 6.652849
SE of the mean = 6.652849
The SE tells us how precise our samples are, i.e., how close to the actual population mean our sample mean is.
We can get the upper quartile value of vole vassopressin with
quantile(vole_vaso, probs = 0.75)
## 75%
## 83
75% = 83
# Create sample size object
samp_size <- 10
# Set seed for reproducibility
set.seed(123)
# Run 1 resample using sample size object to get the upper quantile
sample(vole_vaso,
size = samp_size,
replace = TRUE) %>%
quantile(prob = 0.75)
## 75%
## 59.75
The upper quartile of this resample is 59.75
# Set seed for reproducibility
set.seed(123)
# Create data frame
vole_upperq_sims <- data.frame(samp_sizes = 5:20) %>%
# 2c. Use this data frame to get simulated upper quartiles for each sample size using 1,000 simulations
rowwise(samp_sizes) %>%
# replicate
summarize(boot_upperq = replicate(1000,
# bootstrap draws
sample(vole_vaso,
size = samp_sizes,
replace = TRUE) %>%
# & get an IQR
quantile(prob = 0.75)))
# View the output
vole_upperq_sims
## # A tibble: 16,000 x 2
## # Groups: samp_sizes [16]
## samp_sizes boot_upperq
## <int> <dbl>
## 1 5 60
## 2 5 59
## 3 5 86
## 4 5 77
## 5 5 59
## 6 5 70
## 7 5 82
## 8 5 74
## 9 5 82
## 10 5 74
## # … with 15,990 more rows
E.C. Add a red dashed line using geom_vline() or geom_hline() to show where that should be, perhaps.
# Create the plot
iqr_samp_size_plot <- ggplot(data = vole_upperq_sims,
mapping = aes(x = samp_sizes,
y = boot_upperq)) +
# Add geom_point
geom_point() +
# Change x-axis scale to make plot more clear
scale_x_continuous(breaks = round(seq(min(vole_upperq_sims$samp_sizes), max(vole_upperq_sims$samp_sizes), by = 1),1))
# View plot
iqr_samp_size_plot
The best sample size looks to be around 11, since the upper quantile doesn’t seem to vary much from sample size of 11 and up
# Add vline at this sample size:
iqr_samp_size_plot + geom_vline(xintercept = 11, color = "red",
linetype = "dashed")
# Get SE of the upper quantiles
boot_se_upperq <- vole_upperq_sims %>%
group_by(samp_sizes) %>%
summarize(se_upperq = sd(boot_upperq))
# Create the plot
se_upperq_plot <- ggplot(data = boot_se_upperq,
mapping = aes(x = samp_sizes,
y = se_upperq)) +
geom_line() +
# Change x-axis tick marks to make clearer
scale_x_continuous(breaks = round(seq(min(boot_se_upperq$samp_sizes), max(boot_se_upperq$samp_sizes), by = 1),1))
# View it
se_upperq_plot
This plot doesn’t seem to level off as much as the previous one. Even at sample size = 11, the SE still seems to continue to decrease. From this plot, a sample size of around 18 seems better, as the SE sort of levels out here.
(Libraries loaded above)
theme_set(theme_bw(base_size=12))
ice <- read_csv("http://biol607.github.io/homework/data/NH_seaice_extent_monthly_1978_2016.csv") %>%
mutate(Month_Name = factor(Month_Name),
Month_Name = fct_reorder(Month_Name, Month))
ggplot(data = ice,
mapping = aes(x = Month_Name,
y = Extent)) +
geom_boxplot()
# Calculate annual min ice extent
ice <- ice %>%
group_by(Year) %>%
mutate(annual_min_ext = min(Extent)) %>%
ungroup()
# Check ice for new column
head(ice)
## # A tibble: 6 x 7
## Year Month Day Extent Missing Month_Name annual_min_ext
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <dbl>
## 1 1978 11 1 10.7 0 Nov 10.7
## 2 1978 12 1 12.7 0 Dec 10.7
## 3 1979 2 1 15.9 0 Feb 7.23
## 4 1979 3 1 16.6 0 Mar 7.23
## 5 1979 6 1 13.1 0 Jun 7.23
## 6 1979 7 1 11.6 0 Jul 7.23
# Plot it by year
ggplot(data = ice,
mapping = aes(x = Year,
y = annual_min_ext)) +
geom_line()
Right before 1980 (perhaps 1979), the annual minimum extent drops dramatically. It fluctuates each year after that, hovering around 7, and then drops again around 2002
cut_interval(1:10, n = 5)
## [1] [1,2.8] [1,2.8] (2.8,4.6] (2.8,4.6] (4.6,6.4] (4.6,6.4] (6.4,8.2]
## [8] (6.4,8.2] (8.2,10] (8.2,10]
## Levels: [1,2.8] (2.8,4.6] (4.6,6.4] (6.4,8.2] (8.2,10]
ext_by_year_plot <- ggplot(data = ice,
mapping = aes(x = Year,
y = Extent,
color = Month_Name)) +
geom_line()
# View it
ext_by_year_plot
ext_by_year_plot + facet_wrap(~cut_interval(Month, n = 4))
ext_by_month_plot <- ggplot(data = ice,
mapping = aes(x = Month_Name,
y = Extent,
color = Year)) +
geom_line(mapping =
aes(group = Year))
# View it
ext_by_month_plot
# Install and load colorfindr
#install.packages("colorfindr")
library(colorfindr)
# Plot (5000 randomly selected pixels) of aurora borealis
get_colors("https://upload.wikimedia.org/wikipedia/commons/a/aa/Polarlicht_2.jpg") %>%
plot_colors_3d(sample_size = 5000, marker_size = 2.5, color_space = "RGB")
# Set seed
set.seed(123)
# Get colors and create a palette with n = 5
my_palette <- get_colors("https://upload.wikimedia.org/wikipedia/commons/a/aa/Polarlicht_2.jpg") %>%
make_palette(n = 50)
# Sort colors
rgb <- col2rgb(my_palette)
tsp <- as.TSP(dist(t(rgb)))
sol <- solve_TSP(tsp, control = list(repetitions = 1e3))
ordered_cols <- my_palette[sol]
lab <- convertColor(t(rgb), 'sRGB', 'Lab')
ordered_cols2 <- my_palette[order(lab[, 'L'])]
# Plot ordered colors
my_palette_ordered <- ggplot2::qplot(x = 1:50, y = 1, fill = I(ordered_cols2), geom = 'col', width = 1) + ggplot2::theme_void()
# Add different color palette, theme, etc.
ext_by_month_plot <- ext_by_month_plot + scale_color_gradientn(colors = ordered_cols2)
ext_by_month_plot + theme_light() +
labs(title = "Monthly Change in Sea Ice Extent, by Year", x = "Month", y = "Extent")